by: Yana Kryshchuk
import altair as alt
import pandas as pd
from vega_datasets import data
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')
data_ = pd.read_csv("all_month.csv")
# Filtering the earthquakes data
data_ = data_[data_["type"] == "earthquake"]
data_["index_"] = data_.index
data_["time"] = pd.to_datetime(data_["time"])
data_["day_of_the_week"] = data_["time"].dt.day_name()
data_["month"] = data_["time"].dt.month_name()
For this kind of insight I'm using a layered binned histogram as it allows us to see a better picture at the data and its quantitative patterns.
Even though in February we had significantly smaller amount of data than in January, we are able to analize the difference in magnitude distributions.
alt.Chart(data_).mark_area(opacity=0.5, interpolate='step').encode(
x=alt.X('mag:Q', bin=alt.Bin(maxbins=25), title="Magnitude"),
y=alt.Y('count()', stack=None, title="No. of earthquakes"),
color=alt.Color("month:N", scale=alt.Scale(scheme='greenblue'), sort=["January"], title="Month"),
tooltip=[alt.Tooltip("month", title="Month"),
# alt.Tooltip("mag:Q", title="Magnitude"),
alt.Tooltip('count()', title='No. of earthquakes')]
).properties(width = 600, height = 500, title = "Magnitude distribution" )
For the analysis of the time-series data I used both standard line plots and heatmaps as they are the most suitable ones considering the type of data we have
alt.Chart(data_).mark_line(color="green", point=alt.OverlayMarkDef(color="green")).encode(
x=alt.X('yearmonthdate(time):O', title="Date"),
y=alt.Y('count():Q', title="No. of earthquakes"),
tooltip=[alt.Tooltip('count()', title='No. of earthquakes'),
alt.Tooltip("yearmonthdate(time)", title="Date")]
).properties(width=600, height=500, title="Changes in the number of earthquakes throughout the months")
alt.Chart(data_).mark_rect().encode(
x=alt.X('hours(time):O', title="Time of the day"),
y=alt.Y('day(time):O', title="Day of the week"),
color=alt.Color('count():Q', title="No. of earthquakes", scale=alt.Scale(scheme='greenblue')),
tooltip=[alt.Tooltip("count()", title="No. of earthquakes"),
alt.Tooltip("hours(time)", title="Time of the day"),
alt.Tooltip("day(time)", title="Day of the week")]
).properties(width=600, height=500, title="Changes in the number of earthquakes throughout the day")
For this kind of analysis it's better to use the world map, as we have location-based data.
The issue with such way of the visualization is inability to zoom in the map and get a more detailed look at the map. Ideally in that scenario we'd use a cloropleth map, as it allows to group the points in regions
countries = alt.topo_feature(data.world_110m.url, 'countries')
base = alt.Chart(countries).mark_geoshape(
fill='lightgray',
stroke='white'
).project('equirectangular').properties(
width=1000,
height=500
)
earthquakes = alt.Chart(data_).mark_circle(color="green").encode(
longitude='longitude:Q',
latitude='latitude:Q',
size=alt.value(10),
tooltip=[alt.Tooltip("count()", title="No. of earthquakes")]
).properties(width=1000,
height=500)
(base + earthquakes).properties(width=1000, height=500, title="Geographical location of the earthquakes")